rm(list=ls())

#set working directory that includes this script and the raw data files

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

#check working directory is correct
getwd()

#Load all the packages needed for analysis

if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyr, dplyr, lmerTest, stargazer, 
               mfx, ggplot2, ggpubr, reshape2
)

#Read data from working directory
df <- read.csv(file = "RawData/Evasion study Sender Final - Experiment 2_November 5, 2019_10.33.csv", header = T, sep = ",")

#remove additional headers generated by Qualtrics
df <- df %>%  filter(!row_number() %in% c(1, 2))

n <- nrow(df) # N = 1207

df<-df%>%
  dplyr::rename(Prolific_id=Q37, Duration=Duration..in.seconds.,
         Control_q1=Q12,	errors_q1=Q13,	Control_q2=Q14,	errors_q2=Q15,	
         Control_q3=Q16,	errors_q3=Q17,	Control_q4=Q18,	errors_q4=Q19,	
         Control_q5=Q20,	errors_q5=Q21, Control_q6=Q42,	errors_q6=Q44,
         decision=Q24,	belief_1=Q30,	belief_2=Q39, belief_3=Q31,
         gender=Q33,	gender_other=Q33_3_TEXT, age=Q34_1_TEXT, 
         education=Q46, education_other=Q46_7_TEXT,
         condition=Name)%>%
  dplyr::mutate(sub_id=row_number())


# Check if any duplicates
summary(duplicated(df$Prolific_id)) # TRUE = 2
df <- subset(df, duplicated(df$Prolific_id) == FALSE) 
# N = 1205

# Summary stats to see how many people didn't finish the survey
table(df$Finished) # FALSE = 0
df <- subset(df, Finished == 1 & DistributionChannel!="preview") 
# N = 1204


#------------------------------
# CREATE AND EDIT VARS
#------------------------------

evasion_S2<-df%>%
  dplyr::select(Duration, ResponseId, Control_q1,	errors_q1,	
                Control_q2,	errors_q2,	Control_q3,	errors_q3,	
                Control_q4,	errors_q4,	Control_q5,	errors_q5,
                Control_q6,	errors_q6, decision,	
                belief_1,	belief_2, belief_3, 
                gender, gender_other, age, education, education_other,
                condition, 	Prolific_id,	Spinner_Outcome,	X,	Z, sub_id)

evasion_S2$belief_1<-as.numeric(evasion_S2$belief_1)
evasion_S2$belief_2<-as.numeric(evasion_S2$belief_2)
evasion_S2$belief_3<-as.numeric(evasion_S2$belief_3)
evasion_S2$decision<-as.numeric(evasion_S2$decision)
evasion_S2$gender<-as.numeric(evasion_S2$gender)
evasion_S2$education<-as.numeric(evasion_S2$education)

evasion_S2<-evasion_S2 %>%
  mutate(treat=ifelse(condition=="Treatment I don't know", "t_idk",
                      ifelse(condition=="Treatment silence", "t_s",
                             ifelse(condition=="Treatment Half-truth", "t_ht",
                                    ifelse(condition=="Control silence", "c_s",
                                           ifelse(condition=="Control Half-truth", "c_ht","c_idk"))))),
         decision_code=ifelse(decision==1,"truth","lie"))%>% 
  drop_na(decision)

evasion_S2<-evasion_S2 %>%
  mutate(treat_pool=ifelse(treat=="c_s"|treat=="c_ht"|treat=="c_idk","control",treat),
         lie=ifelse(decision_code=="lie",1,0))

evasion_S2<-arrange(evasion_S2, condition)

# Drop unused factor levels from dataset
evasion_S2 <- droplevels(evasion_S2)

evasion_S2$age<-as.character(evasion_S2$age)


df_S2<-evasion_S2%>%
  mutate(age_clean=ifelse(age=="-99", NA,
                          ifelse(age=="forty seven", "47",
                                 ifelse(age=="fifty-two", "52", age))),
         female=ifelse(gender==1,1,
                       ifelse(gender==4, NA, 0)))%>%
  mutate(educ_recode = ifelse(education>=10,"high",
                              ifelse(education==9 | education ==1, "medium_low", 
                                     ifelse(education==7, "other",NA))))%>%
  mutate(educ_high=ifelse(educ_recode=="high",1,0))%>%
  dplyr::rename(b_red_after_X=belief_1, b_red_after_BLUE=belief_2, b_liars=belief_3)%>%
  mutate(treat_pool_bin=ifelse(treat_pool=="control", "direct_lie", "evasion"),
         experiment="open")

df_S2$age_clean<-as.numeric(df_S2$age_clean)


# -------- Write output data to file ## ----------

# R format
#filename <- paste("Data-Evasion-S1", Sys.Date(), ".Rdata", sep="")
filename <- "Data-Evasion-S2.Rdata"
save(df_S2, file = filename)
# .csv format
write.csv(df_S2, "Data-Evasion-S2.csv")
